import pandas as pd
Intel MKL WARNING: Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated. Intel oneAPI Math Kernel Library 2025.0 will require Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions.
Intel MKL WARNING: Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated. Intel oneAPI Math Kernel Library 2025.0 will require Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions.
df=pd.read_csv('Concatenated_Data.csv')
selected_races = ['Hispanic or Latino', 'Black or African American','American Indian or Alaska Native', 'Asian', 'Native Hawaiian or Other Pacific Islander', 'Two or more races']


selected_majors = ['26.01 - Biology, General.']

filtered_df = df[
    (df['Race and Ethnicity'].isin(selected_races)) &
    (df['Major'].isin(selected_majors))
]


grouped_df = filtered_df.groupby(
    ['Race and Ethnicity',]
)['Awards/Degrees Conferred'].sum().reset_index()
import plotly.express as px

fig = px.bar(
    grouped_df,
    x='Awards/Degrees Conferred',
    y='Race and Ethnicity',
    orientation='h',
    color='Race and Ethnicity',
    color_discrete_sequence=px.colors.qualitative.D3, 
    hover_data={
        'Awards/Degrees Conferred': True,
        'Race and Ethnicity': True
    },
    title='Earned Doctorates in Biology (2021–2023)'
)

fig.update_layout(
    xaxis_title='Degrees Conferred',
    yaxis_title='Race',
    showlegend=False,
    template='plotly_white',
    title_font=dict(size=16)
)

fig.show()
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px        


df = pd.read_csv("Concatenated_Data.csv")

afro_majors = [
  '26.01 - Biology, General.'
]


afro_df = (
    df[df["Major"].isin(afro_majors)]
      .groupby(["Major", "Sex"])["Awards/Degrees Conferred"]
      .sum()
      .reset_index()
)




majors = afro_df["Major"].unique()
palette = px.colors.qualitative.Set1     
fig = go.Figure()

for i, maj in enumerate(majors):
    sub = afro_df[afro_df["Major"] == maj]
    fig.add_trace(
        go.Bar(
            x=sub["Awards/Degrees Conferred"],
            y=sub["Sex"],
            orientation="h",
            marker_color=palette[: len(sub)],
            name=maj,
            visible=(i == 0)           
        )
    )


fig.update_layout(
   
    template="plotly_white",
    xaxis_title="Degrees Conferred",
    yaxis_title="Gender",
    showlegend=False,
    title=f"Earned Doctorates in {majors[0]} by Gender (2021–2023)",
    transition={"duration": 500},
)

fig.show()
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px


df = pd.read_csv("Concatenated_Data.csv")


df = df[df["Fiscal Year"].between(2021, 2023)]

selected_majors = ["26.01 - Biology, General."]
selected_races = [
    "Hispanic or Latino",
    "Black or African American",
    "American Indian or Alaska Native",
    "Asian",
    "Native Hawaiian or Other Pacific Islander",
    "Two or more races",
]

mask = df["Major"].isin(selected_majors) & df["Race and Ethnicity"].isin(selected_races)
bio_df = df.loc[mask]


overall_tbl = (
    bio_df.groupby("Institution Name")["Awards/Degrees Conferred"]
          .sum()
          .nlargest(10)
          .reset_index(name="Degrees")
)
overall_tbl["Race Group"] = "Overall"


race_tables = []
for race in selected_races:
    tbl = (
        bio_df[bio_df["Race and Ethnicity"] == race]
              .groupby("Institution Name")["Awards/Degrees Conferred"]
              .sum()
              .nlargest(10)
              .reset_index(name="Degrees")
    )
    tbl["Race Group"] = race
    race_tables.append(tbl)

full_tbl = pd.concat([overall_tbl] + race_tables, ignore_index=True)

# ── 4. PLOTLY BAR CHART WITH DROPDOWN ──────────────────────────────
palette = px.colors.qualitative.D3            # vibrant palette
groups  = full_tbl["Race Group"].unique()

fig = go.Figure()

for i, grp in enumerate(groups):
    sub = full_tbl[full_tbl["Race Group"] == grp]
    fig.add_trace(
        go.Bar(
            x=sub["Degrees"],
            y=sub["Institution Name"],
            orientation="h",
            marker_color=palette[i % len(palette)],
            name=grp,
            visible=(i == 0)           
        )
    )


buttons = []
for i, grp in enumerate(groups):
    vis = [False] * len(groups)
    vis[i] = True
    buttons.append(
        dict(
            label=grp,
            method="update",
            args=[
                {"visible": vis},
                {"title":
                 f"Top-10 Institutions – {grp} Biology Doctorates (FY 2021-2023)"}
            ],
        )
    )

fig.update_layout(
    updatemenus=[dict(
        type="dropdown", buttons=buttons, active=0,
        x=1.02, y=1, xanchor="left", yanchor="top",
        bgcolor="white", bordercolor="gray"
    )],
    template="plotly_white",
    xaxis_title="Doctorates Conferred",
    yaxis_title="Institution",
    showlegend=False,
    title="Top-10 Institutions – Overall Biology Doctorates (FY 2021-2023)",
    height=520,
    transition={"duration": 500}
)

fig.show()